# 均方差异常检测算法
# 在统计学中，如果一个数据分布近似正态,那么大约 99.7% 会在三个标准差范围内。
import numpy as np
import xlrd  #读Excel数据用
import matplotlib.pyplot as plt
import random # 检验用，用于生成随机数


def find_anomalies(data_in):
    # Set upper and lower limit to 3 standard deviation
    anomalies = []
    random_data_std = np.std(data_in)
    random_data_mean = np.mean(data_in)
    sigma3 = random_data_std * 3

    lower_limit  = random_data_mean - sigma3 
    upper_limit = random_data_mean + sigma3
    print('范围下限为：')
    print(lower_limit)
    print('范围上限为：')
    print(upper_limit)
    # Generate outliers
    for outlier in data_in:
        if outlier > upper_limit or outlier < lower_limit:
            anomalies.append(outlier)
    return anomalies

# 3sigma原则
file_location = "D:\服创\数数数学学学建建建模模模\鸢尾花X数据.xlsx"
data = xlrd.open_workbook(file_location) #	data是Excel里的数据
sheet = data.sheet_by_index(0)  #读取到第一张表格
days = [sheet.cell_value(r,1) for r in range(1,sheet.nrows)]   # 读取到第1列的所有值
print(days)

error = find_anomalies(days)
print("异常点为")
print(error)
